-
Notifications
You must be signed in to change notification settings - Fork 14.8k
[AArch64] Add sve bf16 fpext and fpround costs. #150485
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-backend-aarch64 Author: David Green (davemgreen) ChangesThis prevents them from generating Invalid costs, as generating the instructions seems to work fine with and without +bf16. The costs are mostly taken from the number of instructions (minus ptrue and constants) from https://llvm.godbolt.org/z/16zMd47he Full diff: https://github.com/llvm/llvm-project/pull/150485.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 40f49dade6131..ba00d9e4dbb99 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -3093,6 +3093,12 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ISD::FP_ROUND, MVT::v2bf16, MVT::v2f64, 2}, // bfcvtn+fcvtn
{ISD::FP_ROUND, MVT::v4bf16, MVT::v4f64, 3}, // fcvtn+fcvtl2+bfcvtn
{ISD::FP_ROUND, MVT::v8bf16, MVT::v8f64, 6}, // 2 * fcvtn+fcvtn2+bfcvtn
+ {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f32, 1}, // bfcvt
+ {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f32, 1}, // bfcvt
+ {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f32, 3}, // bfcvt+bfcvt+uzp1
+ {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f64, 2}, // fcvtx+bfcvt
+ {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f64, 5}, // fcvtx+bfcvt+bfcvt+uzp1
+ {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f64, 11}, // 4*fcvt+4*bfcvt+3*uzp
};
if (ST->hasBF16())
@@ -3501,11 +3507,21 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ISD::FP_ROUND, MVT::nxv4f16, MVT::nxv4f32, 1},
{ISD::FP_ROUND, MVT::nxv8f16, MVT::nxv8f32, 3},
+ // Truncate from nxvmf32 to nxvmbf16.
+ {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f32, 8},
+ {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f32, 8},
+ {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f32, 17},
+
// Truncate from nxvmf64 to nxvmf16.
{ISD::FP_ROUND, MVT::nxv2f16, MVT::nxv2f64, 1},
{ISD::FP_ROUND, MVT::nxv4f16, MVT::nxv4f64, 3},
{ISD::FP_ROUND, MVT::nxv8f16, MVT::nxv8f64, 7},
+ // Truncate from nxvmf64 to nxvmbf16.
+ {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f64, 9},
+ {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f64, 19},
+ {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f64, 39},
+
// Truncate from nxvmf64 to nxvmf32.
{ISD::FP_ROUND, MVT::nxv2f32, MVT::nxv2f64, 1},
{ISD::FP_ROUND, MVT::nxv4f32, MVT::nxv4f64, 3},
@@ -3516,11 +3532,22 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4f16, 1},
{ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8f16, 2},
+ // Extend from nxvmbf16 to nxvmf32.
+ {ISD::FP_EXTEND, MVT::nxv2f32, MVT::nxv2bf16, 1}, // lsl
+ {ISD::FP_EXTEND, MVT::nxv4f32, MVT::nxv4bf16, 1}, // lsl
+ {ISD::FP_EXTEND, MVT::nxv8f32, MVT::nxv8bf16, 2}, // unpck+unpck+lsl+lsl
+
// Extend from nxvmf16 to nxvmf64.
{ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f16, 1},
{ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f16, 2},
{ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f16, 4},
+ // Extend from nxvmbf16 to nxvmf64.
+ {ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2bf16, 2}, // lsl+fcvt
+ {ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4bf16, 6}, // unpck+unpck+lsl+lsl
+ // + fcvt+fcvt
+ {ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8bf16, 14}, // 6*unpck+4*lsl+4*fcvt
+
// Extend from nxvmf32 to nxvmf64.
{ISD::FP_EXTEND, MVT::nxv2f64, MVT::nxv2f32, 1},
{ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f32, 2},
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll b/llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll
index 805b3713830ab..ddc1a72e16318 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-fpext.ll
@@ -34,12 +34,12 @@ define void @sve_fpext() {
define void @sve_fpext_bf16() {
; CHECK-LABEL: 'sve_fpext_bf16'
-; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_to_f32 = fpext <vscale x 2 x bfloat> undef to <vscale x 2 x float>
-; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_to_f32 = fpext <vscale x 4 x bfloat> undef to <vscale x 4 x float>
-; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f32 = fpext <vscale x 8 x bfloat> undef to <vscale x 8 x float>
-; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_to_f64 = fpext <vscale x 2 x bfloat> undef to <vscale x 2 x double>
-; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_to_f64 = fpext <vscale x 4 x bfloat> undef to <vscale x 4 x double>
-; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f64 = fpext <vscale x 8 x bfloat> undef to <vscale x 8 x double>
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %nxv2_f16_to_f32 = fpext <vscale x 2 x bfloat> undef to <vscale x 2 x float>
+; CHECK-NEXT: Cost Model: Found costs of 1 for: %nxv4_f16_to_f32 = fpext <vscale x 4 x bfloat> undef to <vscale x 4 x float>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f32 = fpext <vscale x 8 x bfloat> undef to <vscale x 8 x float>
+; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_to_f64 = fpext <vscale x 2 x bfloat> undef to <vscale x 2 x double>
+; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_to_f64 = fpext <vscale x 4 x bfloat> undef to <vscale x 4 x double>
+; CHECK-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_to_f64 = fpext <vscale x 8 x bfloat> undef to <vscale x 8 x double>
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%nxv2_f16_to_f32 = fpext <vscale x 2 x bfloat> undef to <vscale x 2 x float>
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll b/llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll
index bb31ebfbed4ba..88c53ec4510b1 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-fptrunc.ll
@@ -34,14 +34,23 @@ define void @sve_fptruncs() {
}
define void @sve_fptruncs_bf16() {
-; CHECK-LABEL: 'sve_fptruncs_bf16'
-; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x bfloat>
-; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x bfloat>
-; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x bfloat>
-; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x bfloat>
-; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x bfloat>
-; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x bfloat>
-; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+; CHECK-NOBF16-LABEL: 'sve_fptruncs_bf16'
+; CHECK-NOBF16-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x bfloat>
+; CHECK-NOBF16-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x bfloat>
+; CHECK-NOBF16-NEXT: Cost Model: Found costs of RThru:17 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x bfloat>
+; CHECK-NOBF16-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x bfloat>
+; CHECK-NOBF16-NEXT: Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x bfloat>
+; CHECK-NOBF16-NEXT: Cost Model: Found costs of RThru:39 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x bfloat>
+; CHECK-NOBF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-BF16-LABEL: 'sve_fptruncs_bf16'
+; CHECK-BF16-NEXT: Cost Model: Found costs of 1 for: %nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x bfloat>
+; CHECK-BF16-NEXT: Cost Model: Found costs of 1 for: %nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x bfloat>
+; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f32 = fptrunc <vscale x 8 x float> undef to <vscale x 8 x bfloat>
+; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %nxv2_f16_from_f64 = fptrunc <vscale x 2 x double> undef to <vscale x 2 x bfloat>
+; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %nxv4_f16_from_f64 = fptrunc <vscale x 4 x double> undef to <vscale x 4 x bfloat>
+; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:1 SizeLat:1 for: %nxv8_f16_from_f64 = fptrunc <vscale x 8 x double> undef to <vscale x 8 x bfloat>
+; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%nxv2_f16_from_f32 = fptrunc <vscale x 2 x float> undef to <vscale x 2 x bfloat>
%nxv4_f16_from_f32 = fptrunc <vscale x 4 x float> undef to <vscale x 4 x bfloat>
@@ -53,6 +62,3 @@ define void @sve_fptruncs_bf16() {
ret void
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-BF16: {{.*}}
-; CHECK-NOBF16: {{.*}}
|
This comment was marked as off-topic.
This comment was marked as off-topic.
4155e87
to
ef71c66
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Some nits but otherwise looks good to me.
This prevents them from generating Invalid costs, as generating the instructions seems to work fine with and without +bf16. The costs are mostly taken from the number of instructions (minus ptrue and constants) from https://llvm.godbolt.org/z/16zMd47he
ef71c66
to
25f5263
Compare
This prevents them from generating Invalid costs, as generating the instructions seems to work fine with and without +bf16. The costs are mostly taken from the number of instructions (minus ptrue and constants).
This prevents them from generating Invalid costs, as generating the instructions seems to work fine with and without +bf16. The costs are mostly taken from the number of instructions (minus ptrue and constants) from https://llvm.godbolt.org/z/16zMd47he